__author__ = 'maes_leung'
from bs4 import BeautifulSoup

import urllib.request
import urllib.parse
import re
import http.cookiejar

def getHtml(url):
    cj = http.cookiejar.CookieJar()          #创建一个CookieJar
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))     #利用HTTPCookieProcessor处理缓存
    opener.addheaders = [('User-Agent',
                          'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'),
                         ('Cookie', '2c7505bca2e54d1e85df92d947f2cc5a')]             #加入用于模拟浏览器的headers
    urllib.request.install_opener(opener)
    html_bytes = urllib.request.urlopen(url).read()
    html_string = html_bytes.decode('utf-8')
    return html_string

#综合：        http://www.acfun.tv/v/list110/index.htm
#工作·情感：  http://www.acfun.tv/v/list73/index.htm
#动画文化：    http://www.acfun.tv/v/list74/index.htm
#漫画·轻小说：http://www.acfun.tv/v/list75/index.htm
#游戏：        http://www.acfun.tv/v/list74/index.htm
tarUrl = "http://www.acfun.tv/v/list110/index.htm"
html_doc = getHtml(tarUrl)
soup = BeautifulSoup(html_doc, 'html.parser')           

block = soup.find_all('div', attrs = {"class" : "block"})

for b in block:
    if '今日最热' in str(b):
        for a in b.find_all('a'):
            print("id:" + a['data-aid'] +
                  "    title:" + a['title'])  